import os
NOVA_HOME = "/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA"
NOVA_DATA_HOME = '/home/labs/hornsteinlab/Collaboration/FUNOVA'
LOGS_PATH = os.path.join(NOVA_DATA_HOME, "outputs/preprocessing/logs/")
PLOT_PATH = os.path.join(NOVA_DATA_HOME, "outputs/logs/")
os.chdir(NOVA_HOME)
import pandas as pd
import numpy as np
import contextlib
import io
from IPython.display import display, Javascript
import seaborn as sns
from tools.preprocessing_tools.qc_reports.qc_utils import log_files_qc, run_validate_folder_structure, display_diff, sample_and_calc_variance, \
show_site_survival_dapi_brenner, show_site_survival_dapi_cellpose, \
show_site_survival_dapi_tiling, show_site_survival_target_brenner, \
calc_total_sums, plot_filtering_heatmap, show_total_sum_tables, \
plot_cell_count, plot_catplot, plot_hm_combine_batches, plot_hm, \
run_calc_hist_new, plot_marker_data, find_bad_wells,\
show_site_survival_by_brenner_on_dapi_tiles, show_site_survival_target_brenner_tiles
from tools.preprocessing_tools.qc_reports.qc_config import (
funova_markers as markers,
funova_cell_lines as cell_lines,
funova_cell_lines_to_cond as cell_lines_to_cond,
funova_cell_lines_for_disp as cell_lines_for_disp,
funova_reps as reps,
funova_line_colors as line_colors,
funova_lines_order as lines_order,
funova_custom_palette as custom_palette,
funova_expected_dapi_raw as expected_dapi_raw,
funova_panels as panels,
funova_marker_info as marker_info
)
%load_ext autoreload
%autoreload 2
# choose batches
batches = ['Batch3', 'Batch4']#, 'batch2', 'batch3']
batches
['Batch3', 'Batch4']
I have created a folder called 'Batch3' in the logs dir and put all files inside
validate_antibody = False
df = log_files_qc(LOGS_PATH, batches, only_wt_cond = False)
reading logs of Batch3 Total of 1 files were read. Before dup handeling (172528, 22) After duplication removal #1: (172528, 23) After duplication removal #2: (172528, 23)
df['filename'] = df['filename'].str.split('-').str[0]
df['site_num'] = df['site_num'].str.split('-').str[0]
df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
root_directory_raw = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'raw')
# root_path = "/home/labs/hornsteinlab/Collaboration/FUNOVA/input/images/raw/"
# marker_info_df = create_marker_info_df(root_path)
batches_raw = [batch.replace("_16bit_no_downsample","") for batch in batches]
raws = run_validate_folder_structure(root_directory_raw, False, panels, markers.copy(), PLOT_PATH, marker_info,
cell_lines_to_cond, reps, cell_lines_for_disp, expected_dapi_raw,
batches=batches_raw, fig_width=8,fig_height = 40,
expected_count=100, validate_antibody = validate_antibody)
Batch3 Folder structure is valid. No bad files are found. Total Sites: 118400
======== Batch4 Folder structure is valid. No bad files are found. Total Sites: 118400
======== ====================
root_directory_proc = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'processed')
procs = run_validate_folder_structure(root_directory_proc, True, panels, markers, PLOT_PATH, marker_info,
cell_lines_to_cond, reps, cell_lines_for_disp, expected_dapi_raw,
batches=batches, fig_width=8,fig_height = 40,
expected_count=100, validate_antibody = validate_antibody)
Batch3 Folder structure is valid. No bad files are found. Total Sites: 65128
======== Batch4 Folder structure is valid. No bad files are found. Total Sites: 78140
======== ====================
display_diff(batches, raws, procs, PLOT_PATH, fig_width=8, fig_height = 40)
Batch3
======== Batch4
========
for batch in batches:
with contextlib.redirect_stdout(io.StringIO()):
var = sample_and_calc_variance(root_directory_proc, batch,
sample_size_per_markers=200, cond_count=1, rep_count=len(reps),
num_markers=len(markers))
print(f'{batch} var: ',var)
Batch3 var: 0.023869666121074315 Batch4 var: 0.021494905382684237
By order of filtering
Percentage out of the total sites
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,batches, line_colors, panels,
figsize=(6,18), reps=reps, vmax=100)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if Cellpose found 0 cells in it.
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, batches, dapi_filter_by_brenner,
line_colors, panels, figsize=(6,18), reps=reps)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if after tiling, no tile is containing at least 85% of a cell that Cellpose detected.
dapi_filter_by_tiling=show_site_survival_dapi_tiling(df_dapi, batches, dapi_filter_by_cellpose,
line_colors, panels, figsize=(6,18), reps=reps)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).
dapi_filter_by_brenner_tiles=show_site_survival_by_brenner_on_dapi_tiles(df_dapi, batches, dapi_filter_by_tiling,
line_colors, panels, figsize=(6,18), reps=reps)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).
show_site_survival_target_brenner(df_dapi, df_target, dapi_filter_by_tiling,
figsize=(6,24), markers=markers)
df_dapi.iloc[np.where(df_dapi['n_valid_tiles']-df_dapi['n_valid_tiles_after_tiles_brenner'])][['n_valid_tiles','n_valid_tiles_after_tiles_brenner']]
| n_valid_tiles | n_valid_tiles_after_tiles_brenner | |
|---|---|---|
| 54270 | 1 | 0.0 |
| 54291 | 2 | 0.0 |
| 54290 | 1 | 0.0 |
| 54239 | 3 | 0.0 |
| 54227 | 1 | 0.0 |
| ... | ... | ... |
| 111316 | 1 | 0.0 |
| 111345 | 1 | 0.0 |
| 111399 | 2 | 0.0 |
| 111427 | 4 | 3.0 |
| 111824 | 6 | 5.0 |
6411 rows × 2 columns
Find non valid wells
dfb = pd.read_csv("/home/labs/hornsteinlab/Collaboration/FUNOVA/outputs/preprocessing/brenner/raw_metrics280125_exp3.csv")
# Extract the panel (e.g., "panelD") from the Path column
dfb['Panel'] = dfb['Path'].str.extract(r'/panel([A-Z])/', expand=True)
find_bad_wells(dfb, threshold=10000, percentage_filter=60)
| CellLine | Marker | Count_table1 | Count_table2 | Percentage | Combination | Panel | Condition | Rep | Batch | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Control_1017118 | Aberrant-splicing | 580 | 800 | 72.50 | ['CellLine', 'Marker'] | NaN | NaN | NaN | NaN |
| 1 | Control_1017118 | Aberrant-splicing | 580 | 800 | 72.50 | ['Panel', 'CellLine', 'Marker'] | E | NaN | NaN | NaN |
| 2 | Control_1017118 | Aberrant-splicing | 318 | 400 | 79.50 | ['Condition', 'CellLine', 'Marker'] | NaN | Untreated | NaN | NaN |
| 3 | Control_1017118 | DNA-damage-pH2Ax | 306 | 400 | 76.50 | ['Condition', 'CellLine', 'Marker'] | NaN | Untreated | NaN | NaN |
| 4 | C9orf72-HRE_981344 | DNA-damage-P53BP1 | 273 | 400 | 68.25 | ['Condition', 'CellLine', 'Marker'] | NaN | stress | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 188 | C9orf72-HRE_1008566 | Senescence-signaling | 98 | 100 | 98.00 | ['Panel', 'Condition', 'CellLine', 'Rep', 'Bat... | L | stress | rep1 | Batch2 |
| 189 | C9orf72-HRE_981344 | DAPI | 91 | 100 | 91.00 | ['Panel', 'Condition', 'CellLine', 'Rep', 'Bat... | L | stress | rep1 | Batch2 |
| 190 | C9orf72-HRE_981344 | Protein-degradation | 93 | 100 | 93.00 | ['Panel', 'Condition', 'CellLine', 'Rep', 'Bat... | L | stress | rep1 | Batch2 |
| 191 | C9orf72-HRE_981344 | Senescence-signaling | 93 | 100 | 93.00 | ['Panel', 'Condition', 'CellLine', 'Rep', 'Bat... | L | stress | rep1 | Batch2 |
| 192 | Control_1048087 | Senescence-signaling | 99 | 100 | 99.00 | ['Panel', 'Condition', 'CellLine', 'Rep', 'Bat... | L | stress | rep2 | Batch1 |
193 rows × 10 columns
Percentage out of the tiles that passed the previous filter. In parenthesis are absolute values (if different than the percentages).
show_site_survival_target_brenner_tiles(df_dapi, df_target, dapi_filter_by_brenner_tiles,
figsize=(6,24), markers=markers)
names = ['Total number of tiles', 'Total number of whole cells']
stats = ['n_valid_tiles_after_tiles_brenner','site_whole_cells_counts_sum','site_cell_count','site_cell_count_sum']
total_sum = calc_total_sums(df_target, df_dapi, stats, markers)
total_sum.rename(columns={'n_valid_tiles_after_tiles_brenner': 'n_valid_tiles'}, inplace=True)
plot_marker_data(total_sum, split_by_cell_line=True)
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
plot_marker_data(total_sum, split_by_cell_line=False)
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1348: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. subset = data[data['marker'].isin(markers)] if markers else data[~data['marker'].isin(['DAPI', 'TUJ1'])]
total_sum.n_valid_tiles.sum()
642854.0
total_sum[total_sum.marker =='DAPI'].site_whole_cells_counts_sum.sum()
242504.0
total_sum[total_sum.marker =='DAPI'].site_cell_count.sum()
701543.0
show_total_sum_tables(total_sum)
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| Batch3 | ||||
| count | 1184.000000 | 1184.000000 | 1184.000000 | 1184.000000 |
| mean | 226.974662 | 2.269747 | 253.137669 | 745.097973 |
| std | 185.465568 | 1.854656 | 207.887894 | 648.245738 |
| min | 9.000000 | 0.090000 | 9.000000 | 19.000000 |
| 25% | 84.000000 | 0.840000 | 96.750000 | 244.000000 |
| 50% | 178.000000 | 1.780000 | 198.500000 | 565.000000 |
| 75% | 323.250000 | 3.232500 | 355.250000 | 1053.250000 |
| max | 1022.000000 | 10.220000 | 1178.000000 | 3673.000000 |
| sum | 268738.000000 | NaN | 299715.000000 | 882196.000000 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 450.000000 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| Batch4 | ||||
| count | 1184.000000 | 1184.000000 | 1184.000000 | 1.184000e+03 |
| mean | 315.976351 | 3.159764 | 349.445946 | 9.770194e+02 |
| std | 230.875751 | 2.308758 | 258.549280 | 7.640148e+02 |
| min | 11.000000 | 0.110000 | 11.000000 | 2.500000e+01 |
| 25% | 134.000000 | 1.340000 | 148.000000 | 3.917500e+02 |
| 50% | 252.000000 | 2.520000 | 278.000000 | 7.355000e+02 |
| 75% | 451.000000 | 4.510000 | 499.250000 | 1.406750e+03 |
| max | 1070.000000 | 10.700000 | 1270.000000 | 3.602000e+03 |
| sum | 374116.000000 | NaN | 413744.000000 | 1.156791e+06 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 4.500000e+02 |
| n valid tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| All batches | ||||
| count | 2368.000000 | 2368.000000 | 2368.000000 | 2.368000e+03 |
| mean | 271.475507 | 2.714755 | 301.291807 | 8.610587e+02 |
| std | 214.040138 | 2.140401 | 239.434968 | 7.177821e+02 |
| min | 9.000000 | 0.090000 | 9.000000 | 1.900000e+01 |
| 25% | 104.750000 | 1.047500 | 117.000000 | 3.050000e+02 |
| 50% | 218.000000 | 2.180000 | 238.000000 | 6.460000e+02 |
| 75% | 382.000000 | 3.820000 | 418.000000 | 1.218000e+03 |
| max | 1070.000000 | 10.700000 | 1270.000000 | 3.673000e+03 |
| sum | 642854.000000 | NaN | 713459.000000 | 2.038987e+06 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 4.500000e+02 |
For each batch, cell line, replicate and markerTotal number of tiles
to_heatmap = total_sum.rename(columns={'n_valid_tiles':'index'})
plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
xlabel = 'Total number of tiles', show_sum=True, figsize=(6,24))
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:388: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(ax.get_yticklabels(), fontsize=10) /home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:388: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(ax.get_yticklabels(), fontsize=10)
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:388: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(ax.get_yticklabels(), fontsize=10) /home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:388: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(ax.get_yticklabels(), fontsize=10)
For each batch, cell line, replicate and markerTotal number of tiles
to_heatmap = total_sum.rename(columns={'site_whole_cells_counts_sum':'index'})
plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
xlabel = 'Total number of whole cells', show_sum=True, figsize=(6,24))
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:388: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(ax.get_yticklabels(), fontsize=10) /home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:388: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(ax.get_yticklabels(), fontsize=10)
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:388: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(ax.get_yticklabels(), fontsize=10) /home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:388: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(ax.get_yticklabels(), fontsize=10)
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles_after_tiles_brenner !=0]
plot_cell_count(df_no_empty_sites, lines_order, custom_palette, y='site_cell_count_sum',
title='Cell Count Average per Site (from tiles)', figsize=(16,6))
plot_cell_count(df_no_empty_sites, lines_order, custom_palette, y='site_whole_cells_counts_sum',
title='Whole Cell Count Average per Site',figsize=(16,6))
plot_cell_count(df_no_empty_sites, lines_order, custom_palette, y='site_cell_count',
title='Cellpose Cell Count Average per Site',figsize=(16,6))
df_dapi.groupby(['cell_line_cond']).n_valid_tiles_after_tiles_brenner.mean()
cell_line_cond C9orf72-HRE-1008566 Untreated 1.376946 C9orf72-HRE-1008566 stress 0.951932 C9orf72-HRE-981344 Untreated 4.197832 C9orf72-HRE-981344 stress 3.952741 Control-1001733 Untreated 0.868120 Control-1001733 stress 0.638870 Control-1017118 Untreated 3.946600 Control-1017118 stress 3.331590 Control-1025045 Untreated 7.346146 Control-1025045 stress 6.441826 Control-1048087 Untreated 1.981112 Control-1048087 stress 1.837677 TDP--43-G348V-1057052 Untreated 2.024302 TDP--43-G348V-1057052 stress 1.341531 TDP--43-N390D-1005373 Untreated 2.835388 TDP--43-N390D-1005373 stress 2.166033 Name: n_valid_tiles_after_tiles_brenner, dtype: float64
plot_catplot(df_dapi, sns.color_palette('colorblind'), reps=reps,x='cell_line', y_title='Valid Tiles Count', x_title='Cell Line', y='n_valid_tiles_after_tiles_brenner', hue='rep',
height=4, aspect=2)
/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1028: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df.loc[:, 'batch_rep'] = df['batch'] + " " + df['rep'] /home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1039: UserWarning: The palette list has more values (10) than needed (2), which may not be intended. g = sns.catplot(kind='box', data=df, y=y, x=x,height=height, aspect=aspect, hue=hue, palette=palette,
# plot_hm(df_dapi, split_by='rep', rows='cell_line', columns='panel', vmax=3)
# for batch in batches:
# print(batch)
# run_calc_hist_new(batch,cell_lines_for_disp, markers, root_directory_raw, root_directory_proc,
# hist_sample=10,sample_size_per_markers=10, ncols=4, nrows=1, figsize=(6,2))
# print("="*30)
# save notebook as HTML ( the HTML will be saved in the same folder the original script is)
display(Javascript('IPython.notebook.save_checkpoint();'))
os.system(f'jupyter nbconvert --to html tools/preprocessing_tools/qc_reports/qc_report_funova_exp4.ipynb --output {NOVA_HOME}/manuscript/preprocessing_qc_reports/qc_report_funova_exp4_11.03.25.html')
[NbConvertApp] Converting notebook tools/preprocessing_tools/qc_reports/qc_report_funova_exp4.ipynb to html [NbConvertApp] Writing 19687596 bytes to /home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA/manuscript/preprocessing_qc_reports/qc_report_funova_exp4_11.03.25.html
0